import numpy as np
import pickle
import random
import shutil
from pathlib import Path

with open("/datasets/imagenet/splits.pkl", "rb") as f:
    splits = pickle.load(f)
train_examples = splits["train"]

def add_image(path):
    dest = Path("images") / path.name
    if not dest.exists():
        shutil.copy(Path("/datasets/imagenet") / path, dest)

for parent_concept in range(55):
    discovered_concept_labels = np.load(f"sampled_discovered_concepts/sampled_discovered_concepts_{parent_concept + 1}.npy")
    with open(f"sampled_discovered_concepts/naming_results_{parent_concept + 1}.txt", "r") as f:
        lines = f.readlines()
        names = [line.strip().split(": ")[1] for line in lines]

    for sub_concept in range(discovered_concept_labels.shape[1]):
        sub_concept_labels = discovered_concept_labels[:, sub_concept]
        k = min(np.sum(sub_concept_labels == 1), 10)

        with_concept = random.sample(np.nonzero(sub_concept_labels)[0].tolist(), k=k)

        control = random.sample(range(len(train_examples)), k=k)

        for img_idx in with_concept:
            img_path = Path(train_examples[img_idx]["image_path"])
            add_image(img_path)
            print(f"INSERT INTO images (concept, image_file, control) VALUES ('{names[sub_concept]}', '{Path(img_path).name}', 0);")

        for img_idx in control:
            img_path = Path(train_examples[img_idx]["image_path"])
            add_image(img_path)
            print(f"INSERT INTO images (concept, image_file, control) VALUES ('{names[sub_concept]}', '{Path(img_path).name}', 1);")
